1 package org.apache.lucene.index;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 import java.io.ByteArrayOutputStream;
21 import java.io.IOException;
22 import java.io.PrintStream;
23 import java.util.Arrays;
24 import java.util.Collection;
25 import java.util.Collections;
26 import java.util.HashMap;
27 import java.util.HashSet;
28 import java.util.IdentityHashMap;
29 import java.util.List;
30 import java.util.Map;
31 import java.util.Set;
32
33 import org.apache.lucene.analysis.Analyzer;
34 import org.apache.lucene.analysis.MockAnalyzer;
35 import org.apache.lucene.codecs.Codec;
36 import org.apache.lucene.codecs.DocValuesConsumer;
37 import org.apache.lucene.codecs.DocValuesProducer;
38 import org.apache.lucene.codecs.FieldsConsumer;
39 import org.apache.lucene.codecs.FieldsProducer;
40 import org.apache.lucene.codecs.NormsConsumer;
41 import org.apache.lucene.codecs.NormsProducer;
42 import org.apache.lucene.codecs.StoredFieldsReader;
43 import org.apache.lucene.codecs.StoredFieldsWriter;
44 import org.apache.lucene.codecs.TermVectorsReader;
45 import org.apache.lucene.codecs.TermVectorsWriter;
46 import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat;
47 import org.apache.lucene.document.Document;
48 import org.apache.lucene.document.Field;
49 import org.apache.lucene.document.FieldType;
50 import org.apache.lucene.document.NumericDocValuesField;
51 import org.apache.lucene.document.TextField;
52 import org.apache.lucene.store.AlreadyClosedException;
53 import org.apache.lucene.store.Directory;
54 import org.apache.lucene.store.FlushInfo;
55 import org.apache.lucene.store.IOContext;
56 import org.apache.lucene.store.IndexInput;
57 import org.apache.lucene.store.MockDirectoryWrapper;
58 import org.apache.lucene.util.BytesRef;
59 import org.apache.lucene.util.CloseableThreadLocal;
60 import org.apache.lucene.util.IOUtils;
61 import org.apache.lucene.util.InfoStream;
62 import org.apache.lucene.util.LuceneTestCase;
63 import org.apache.lucene.util.RamUsageEstimator;
64 import org.apache.lucene.util.RamUsageTester;
65 import org.apache.lucene.util.Rethrow;
66 import org.apache.lucene.util.StringHelper;
67 import org.apache.lucene.util.TestUtil;
68 import org.apache.lucene.util.Version;
69
70
71
72
73 abstract class BaseIndexFileFormatTestCase extends LuceneTestCase {
74
75
76 private static final Set<Class<?>> EXCLUDED_CLASSES = Collections.newSetFromMap(new IdentityHashMap<Class<?>,Boolean>());
77
78 static {
79
80 EXCLUDED_CLASSES.add(Directory.class);
81 EXCLUDED_CLASSES.add(IndexInput.class);
82
83
84 EXCLUDED_CLASSES.add(CloseableThreadLocal.class);
85 EXCLUDED_CLASSES.add(ThreadLocal.class);
86
87
88 EXCLUDED_CLASSES.add(IndexReader.class);
89 EXCLUDED_CLASSES.add(IndexReaderContext.class);
90
91
92
93 EXCLUDED_CLASSES.add(FieldInfos.class);
94 EXCLUDED_CLASSES.add(SegmentInfo.class);
95 EXCLUDED_CLASSES.add(SegmentCommitInfo.class);
96 EXCLUDED_CLASSES.add(FieldInfo.class);
97
98
99
100 EXCLUDED_CLASSES.add(String.class);
101 }
102
103 static class Accumulator extends RamUsageTester.Accumulator {
104
105 private final Object root;
106
107 Accumulator(Object root) {
108 this.root = root;
109 }
110
111 public long accumulateObject(Object o, long shallowSize, Map<java.lang.reflect.Field, Object> fieldValues, Collection<Object> queue) {
112 for (Class<?> clazz = o.getClass(); clazz != null; clazz = clazz.getSuperclass()) {
113 if (EXCLUDED_CLASSES.contains(clazz) && o != root) {
114 return 0;
115 }
116 }
117
118
119
120
121 long v;
122 if (o instanceof Collection) {
123 Collection<?> coll = (Collection<?>) o;
124 queue.addAll((Collection<?>) o);
125 v = (long) coll.size() * RamUsageEstimator.NUM_BYTES_OBJECT_REF;
126 } else if (o instanceof Map) {
127 final Map<?, ?> map = (Map<?,?>) o;
128 queue.addAll(map.keySet());
129 queue.addAll(map.values());
130 v = 2L * map.size() * RamUsageEstimator.NUM_BYTES_OBJECT_REF;
131 } else {
132 v = super.accumulateObject(o, shallowSize, fieldValues, queue);
133 }
134
135 return v;
136 }
137
138 @Override
139 public long accumulateArray(Object array, long shallowSize,
140 List<Object> values, Collection<Object> queue) {
141 long v = super.accumulateArray(array, shallowSize, values, queue);
142
143 return v;
144 }
145
146 };
147
148
149 protected abstract Codec getCodec();
150
151 private Codec savedCodec;
152
153 public void setUp() throws Exception {
154 super.setUp();
155
156 savedCodec = Codec.getDefault();
157 Codec.setDefault(getCodec());
158 }
159
160 public void tearDown() throws Exception {
161 Codec.setDefault(savedCodec);
162 super.tearDown();
163 }
164
165
166 protected abstract void addRandomFields(Document doc);
167
168 private Map<String, Long> bytesUsedByExtension(Directory d) throws IOException {
169 Map<String, Long> bytesUsedByExtension = new HashMap<>();
170 for (String file : d.listAll()) {
171 if (IndexFileNames.CODEC_FILE_PATTERN.matcher(file).matches()) {
172 final String ext = IndexFileNames.getExtension(file);
173 final long previousLength = bytesUsedByExtension.containsKey(ext) ? bytesUsedByExtension.get(ext) : 0;
174 bytesUsedByExtension.put(ext, previousLength + d.fileLength(file));
175 }
176 }
177 bytesUsedByExtension.keySet().removeAll(excludedExtensionsFromByteCounts());
178
179 return bytesUsedByExtension;
180 }
181
182
183
184
185
186 protected Collection<String> excludedExtensionsFromByteCounts() {
187 return new HashSet<String>(Arrays.asList(new String[] {
188
189
190
191 "si",
192
193 "lock" }));
194 }
195
196
197 public void testMergeStability() throws Exception {
198 Directory dir = newDirectory();
199 if (dir instanceof MockDirectoryWrapper) {
200
201
202 ((MockDirectoryWrapper) dir).setEnableVirusScanner(false);
203 }
204
205
206 MergePolicy mp = newTieredMergePolicy();
207 mp.setNoCFSRatio(0);
208 IndexWriterConfig cfg = new IndexWriterConfig(new MockAnalyzer(random())).setUseCompoundFile(false).setMergePolicy(mp);
209 IndexWriter w = new IndexWriter(dir, cfg);
210 final int numDocs = atLeast(500);
211 for (int i = 0; i < numDocs; ++i) {
212 Document d = new Document();
213 addRandomFields(d);
214 w.addDocument(d);
215 }
216 w.forceMerge(1);
217 w.commit();
218 w.close();
219 DirectoryReader reader = DirectoryReader.open(dir);
220
221 Directory dir2 = newDirectory();
222 if (dir2 instanceof MockDirectoryWrapper) {
223
224
225 ((MockDirectoryWrapper) dir2).setEnableVirusScanner(false);
226 }
227 mp = newTieredMergePolicy();
228 mp.setNoCFSRatio(0);
229 cfg = new IndexWriterConfig(new MockAnalyzer(random())).setUseCompoundFile(false).setMergePolicy(mp);
230 w = new IndexWriter(dir2, cfg);
231 TestUtil.addIndexesSlowly(w, reader);
232
233 w.commit();
234 w.close();
235
236 assertEquals(bytesUsedByExtension(dir), bytesUsedByExtension(dir2));
237
238 reader.close();
239 dir.close();
240 dir2.close();
241 }
242
243
244 @Slow
245 public void testRamBytesUsed() throws IOException {
246 if (Codec.getDefault() instanceof RandomCodec) {
247
248
249 final Set<String> avoidCodecs = new HashSet<>(((RandomCodec) Codec.getDefault()).avoidCodecs);
250 avoidCodecs.add(new MockRandomPostingsFormat().getName());
251 Codec.setDefault(new RandomCodec(random(), avoidCodecs));
252 }
253 Directory dir = newDirectory();
254 IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
255 IndexWriter w = new IndexWriter(dir, cfg);
256
257 final int numDocs = atLeast(10000);
258 LeafReader reader1 = null;
259 for (int i = 0; i < numDocs; ++i) {
260 Document d = new Document();
261 addRandomFields(d);
262 w.addDocument(d);
263 if (i == 100) {
264 w.forceMerge(1);
265 w.commit();
266 reader1 = getOnlySegmentReader(DirectoryReader.open(dir));
267 }
268 }
269 w.forceMerge(1);
270 w.commit();
271 w.close();
272
273 LeafReader reader2 = getOnlySegmentReader(DirectoryReader.open(dir));
274
275 for (LeafReader reader : Arrays.asList(reader1, reader2)) {
276 new SimpleMergedSegmentWarmer(InfoStream.NO_OUTPUT).warm(reader);
277 }
278
279 final long actualBytes = RamUsageTester.sizeOf(reader2, new Accumulator(reader2)) - RamUsageTester.sizeOf(reader1, new Accumulator(reader1));
280 final long expectedBytes = ((SegmentReader) reader2).ramBytesUsed() - ((SegmentReader) reader1).ramBytesUsed();
281 final long absoluteError = actualBytes - expectedBytes;
282 final double relativeError = (double) absoluteError / actualBytes;
283 final String message = "Actual RAM usage " + actualBytes + ", but got " + expectedBytes + ", " + 100*relativeError + "% error";
284 assertTrue(message, Math.abs(relativeError) < 0.20d || Math.abs(absoluteError) < 1000);
285
286 reader1.close();
287 reader2.close();
288 dir.close();
289 }
290
291
292 public void testMultiClose() throws IOException {
293
294 Directory oneDocIndex = newDirectory();
295 IndexWriter iw = new IndexWriter(oneDocIndex, new IndexWriterConfig(new MockAnalyzer(random())));
296 Document oneDoc = new Document();
297 FieldType customType = new FieldType(TextField.TYPE_STORED);
298 customType.setStoreTermVectors(true);
299 Field customField = new Field("field", "contents", customType);
300 oneDoc.add(customField);
301 oneDoc.add(new NumericDocValuesField("field", 5));
302 iw.addDocument(oneDoc);
303 LeafReader oneDocReader = getOnlySegmentReader(DirectoryReader.open(iw, true));
304 iw.close();
305
306
307
308 Directory dir = newFSDirectory(createTempDir("justSoYouGetSomeChannelErrors"));
309 Codec codec = getCodec();
310
311 SegmentInfo segmentInfo = new SegmentInfo(dir, Version.LATEST, "_0", 1, false, codec, Collections.<String,String>emptyMap(), StringHelper.randomId(), new HashMap<String,String>());
312 FieldInfo proto = oneDocReader.getFieldInfos().fieldInfo("field");
313 FieldInfo field = new FieldInfo(proto.name, proto.number, proto.hasVectors(), proto.omitsNorms(), proto.hasPayloads(),
314 proto.getIndexOptions(), proto.getDocValuesType(), proto.getDocValuesGen(), new HashMap<String,String>());
315
316 FieldInfos fieldInfos = new FieldInfos(new FieldInfo[] { field } );
317
318 SegmentWriteState writeState = new SegmentWriteState(null, dir,
319 segmentInfo, fieldInfos,
320 null, new IOContext(new FlushInfo(1, 20)));
321
322 SegmentReadState readState = new SegmentReadState(dir, segmentInfo, fieldInfos, IOContext.READ);
323
324
325 try (FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(writeState)) {
326 consumer.write(oneDocReader.fields());
327 IOUtils.close(consumer);
328 IOUtils.close(consumer);
329 }
330 try (FieldsProducer producer = codec.postingsFormat().fieldsProducer(readState)) {
331 IOUtils.close(producer);
332 IOUtils.close(producer);
333 }
334
335
336 try (DocValuesConsumer consumer = codec.docValuesFormat().fieldsConsumer(writeState)) {
337 consumer.addNumericField(field, Collections.<Number>singleton(5));
338 IOUtils.close(consumer);
339 IOUtils.close(consumer);
340 }
341 try (DocValuesProducer producer = codec.docValuesFormat().fieldsProducer(readState)) {
342 IOUtils.close(producer);
343 IOUtils.close(producer);
344 }
345
346
347 try (NormsConsumer consumer = codec.normsFormat().normsConsumer(writeState)) {
348 consumer.addNormsField(field, Collections.<Number>singleton(5));
349 IOUtils.close(consumer);
350 IOUtils.close(consumer);
351 }
352 try (NormsProducer producer = codec.normsFormat().normsProducer(readState)) {
353 IOUtils.close(producer);
354 IOUtils.close(producer);
355 }
356
357
358 try (TermVectorsWriter consumer = codec.termVectorsFormat().vectorsWriter(dir, segmentInfo, writeState.context)) {
359 consumer.startDocument(1);
360 consumer.startField(field, 1, false, false, false);
361 consumer.startTerm(new BytesRef("testing"), 2);
362 consumer.finishTerm();
363 consumer.finishField();
364 consumer.finishDocument();
365 consumer.finish(fieldInfos, 1);
366 IOUtils.close(consumer);
367 IOUtils.close(consumer);
368 }
369 try (TermVectorsReader producer = codec.termVectorsFormat().vectorsReader(dir, segmentInfo, fieldInfos, readState.context)) {
370 IOUtils.close(producer);
371 IOUtils.close(producer);
372 }
373
374
375 try (StoredFieldsWriter consumer = codec.storedFieldsFormat().fieldsWriter(dir, segmentInfo, writeState.context)) {
376 consumer.startDocument();
377 consumer.writeField(field, customField);
378 consumer.finishDocument();
379 consumer.finish(fieldInfos, 1);
380 IOUtils.close(consumer);
381 IOUtils.close(consumer);
382 }
383 try (StoredFieldsReader producer = codec.storedFieldsFormat().fieldsReader(dir, segmentInfo, fieldInfos, readState.context)) {
384 IOUtils.close(producer);
385 IOUtils.close(producer);
386 }
387
388 IOUtils.close(oneDocReader, oneDocIndex, dir);
389 }
390
391
392
393
394
395 public void testRandomExceptions() throws Exception {
396
397 MockDirectoryWrapper dir = newMockDirectory();
398 dir.setThrottling(MockDirectoryWrapper.Throttling.NEVER);
399 dir.setUseSlowOpenClosers(false);
400 dir.setPreventDoubleWrite(false);
401 dir.setRandomIOExceptionRate(0.001);
402
403
404 ByteArrayOutputStream exceptionLog = new ByteArrayOutputStream();
405 PrintStream exceptionStream = new PrintStream(exceptionLog, true, "UTF-8");
406
407
408 Analyzer analyzer = new MockAnalyzer(random());
409
410 IndexWriterConfig conf = newIndexWriterConfig(analyzer);
411
412 conf.setMergeScheduler(new SerialMergeScheduler());
413 conf.setCodec(getCodec());
414
415 int numDocs = atLeast(500);
416
417 IndexWriter iw = new IndexWriter(dir, conf);
418 try {
419 boolean allowAlreadyClosed = false;
420 for (int i = 0; i < numDocs; i++) {
421 dir.setRandomIOExceptionRateOnOpen(0.02);
422
423 Document doc = new Document();
424 doc.add(newStringField("id", Integer.toString(i), Field.Store.NO));
425 addRandomFields(doc);
426
427
428 try {
429 iw.addDocument(doc);
430
431 iw.deleteDocuments(new Term("id", Integer.toString(i)));
432 } catch (AlreadyClosedException ace) {
433
434 dir.setRandomIOExceptionRateOnOpen(0.0);
435 assertTrue(iw.deleter.isClosed());
436 assertTrue(allowAlreadyClosed);
437 allowAlreadyClosed = false;
438 conf = newIndexWriterConfig(analyzer);
439
440 conf.setMergeScheduler(new SerialMergeScheduler());
441 conf.setCodec(getCodec());
442 iw = new IndexWriter(dir, conf);
443 } catch (Exception e) {
444 if (e.getMessage() != null && e.getMessage().startsWith("a random IOException")) {
445 exceptionStream.println("\nTEST: got expected fake exc:" + e.getMessage());
446 e.printStackTrace(exceptionStream);
447 allowAlreadyClosed = true;
448 } else {
449 Rethrow.rethrow(e);
450 }
451 }
452
453 if (random().nextInt(10) == 0) {
454
455 try {
456 if (random().nextBoolean()) {
457 DirectoryReader ir = null;
458 try {
459 ir = DirectoryReader.open(iw, random().nextBoolean());
460 dir.setRandomIOExceptionRateOnOpen(0.0);
461 TestUtil.checkReader(ir);
462 } finally {
463 IOUtils.closeWhileHandlingException(ir);
464 }
465 } else {
466 dir.setRandomIOExceptionRateOnOpen(0.0);
467
468 iw.commit();
469 }
470 if (DirectoryReader.indexExists(dir)) {
471 TestUtil.checkIndex(dir);
472 }
473 } catch (AlreadyClosedException ace) {
474
475 dir.setRandomIOExceptionRateOnOpen(0.0);
476 assertTrue(iw.deleter.isClosed());
477 assertTrue(allowAlreadyClosed);
478 allowAlreadyClosed = false;
479 conf = newIndexWriterConfig(analyzer);
480
481 conf.setMergeScheduler(new SerialMergeScheduler());
482 conf.setCodec(getCodec());
483 iw = new IndexWriter(dir, conf);
484 } catch (Exception e) {
485 if (e.getMessage() != null && e.getMessage().startsWith("a random IOException")) {
486 exceptionStream.println("\nTEST: got expected fake exc:" + e.getMessage());
487 e.printStackTrace(exceptionStream);
488 allowAlreadyClosed = true;
489 } else {
490 Rethrow.rethrow(e);
491 }
492 }
493 }
494 }
495
496 try {
497 dir.setRandomIOExceptionRateOnOpen(0.0);
498
499 iw.close();
500 } catch (Exception e) {
501 if (e.getMessage() != null && e.getMessage().startsWith("a random IOException")) {
502 exceptionStream.println("\nTEST: got expected fake exc:" + e.getMessage());
503 e.printStackTrace(exceptionStream);
504 try {
505 iw.rollback();
506 } catch (Throwable t) {}
507 } else {
508 Rethrow.rethrow(e);
509 }
510 }
511 dir.close();
512 } catch (Throwable t) {
513 System.out.println("Unexpected exception: dumping fake-exception-log:...");
514 exceptionStream.flush();
515 System.out.println(exceptionLog.toString("UTF-8"));
516 System.out.flush();
517 Rethrow.rethrow(t);
518 }
519
520 if (VERBOSE) {
521 System.out.println("TEST PASSED: dumping fake-exception-log:...");
522 System.out.println(exceptionLog.toString("UTF-8"));
523 }
524 }
525 }